{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 安装\n",
    "## pip install bs4\n",
    "## pip install beautifulsoup4\n",
    "\n",
    "## 解析器安装\n",
    "## pip install lxml\n",
    "## pip install html5lib\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<!DOCTYPE html>\n",
      "\n",
      "<html lang=\"en\">\n",
      "\n",
      "\t<head>\n",
      "\t\t<meta charset=\"UTF-8\">\n",
      "\n",
      "\t\t<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n",
      "\t\t<meta name=\"keywords\" content=\"Python, Python资讯, Python基础教程, Python学习笔记, Python电子书, Python视频教程, Python编程练习, Python Django, Python Flask, Python Tutorials, Python Video Tutorials Download\">\n",
      "\t\t<meta name=\"description\" content=\"www.spbeen.com是一个关于Python技术栈的教程网站，分教程、项目两类，目前有Python3教程、Xpath教程、基于Python的网络爬虫、基于Python的文件备份、\">\n",
      "\t\t<meta name=\"author\" content=\"布啦豆\">\n",
      "\n",
      "\t\t<meta name=\"baidu_union_verify\" content=\"d4634268483255495b43735addb956bf\">\n",
      "\n",
      "        <meta property=\"og:type\" content=\"article\">\n",
      "        <meta property=\"og:image\" content=\"http://upload-images.jianshu.io/upload_images/174489-0434c2855ab98c3a.JPG?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240\">\n",
      "        <meta property=\"og:url\" content=\"http://spbeen.com/course/Python3%E6%95%99%E7%A8%8B/\">\n",
      "        <meta property=\"og:title\" content=\"Python3.x教程\">\n",
      "        <meta property=\"og:description\" content=\"小白的Python3快速入门教程\">\n",
      "\n",
      "\n",
      "\t\t<link rel=\"shortcut icon\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Ffavicon.ico\">\n",
      "\n",
      "\t\t<title>Python技术栈 | Spbeen</title>\n",
      "        \n",
      "    \n",
      "\t\t<link rel=\"stylesheet\" type=\"text/css\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Fwww%2Fcss%2Ftopbottom.css\" />\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/vue/2.1.10/vue.js\"></script>\n",
      "\n",
      "\t\t<!--<link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/semantic-ui/2.2.4/semantic.min.css\">-->\n",
      "        <!--<script src=\"https://cdn.jsdelivr.net/semantic-ui/2.2.4/semantic.min.js\"></script>-->\n",
      "\n",
      "        <link href=\"https://cdnjs.cloudflare.com/ajax/libs/semantic-ui/2.4.1/semantic.css\" rel=\"stylesheet\">\n",
      "        <link rel=\"stylesheet\" href=\"http://spbeen.oss-cn-qingdao.aliyuncs.com/static%2Fwww%2Fcss%2Fmarkdown-www.css\" />\n",
      "        <style>\n",
      "        #bread{background-color: white;box-shadow:0px 0px 0px white;color:black;}\n",
      "        #bread a{color:black;}\n",
      "        #mapicon{font-size: 2em;  margin-right: 0.3em;}\n",
      "        </style>\n",
      "\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/jquery/3.3.1/jquery.min.js\"></script>\n",
      "\n",
      "            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/semantic-ui/2.4.1/semantic.js\"></script>\n",
      "\n",
      "            <script src=\"https://cdn.bootcss.com/reqwest/2.0.5/reqwest.js\"></script>\n",
      "\n",
      "\n",
      "        \n",
      "\n",
      "\n",
      "\t\t<!--<script>(function(i,s,o,g,r,a,m){i[\"DaoVoiceObject\"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;a.charset=\"utf-8\";m.parentNode.insertBefore(a,m)})(window,document,\"script\",('https:' == document.location.protocol ? 'https:' : 'http:') + \"//widget.daovoice.io/widget/c824b807.js\",\"daovoice\")</script>-->\n",
      "\t\t<!--<script>-->\n",
      "\t\t<!--daovoice('init', {-->\n",
      "\t\t  <!--app_id: \"c824b807\"-->\n",
      "\t\t<!--});-->\n",
      "\t\t<!--daovoice('update');-->\n",
      "\t\t<!--</script>-->\n",
      "\t</head>\n",
      "\n",
      "\t<body>\n",
      "    \n",
      "\n",
      "\n",
      "    <div class=\"ui sidebar inverted vertical menu\">\n",
      "\n",
      "        <a href=\"/\" class=\"item bigfont\">\n",
      "            Spbeen <i class=\"home icon\"></i>\n",
      "        </a>\n",
      "        <a class=\"item\" href=\"/index\">\n",
      "          <i class=\"book icon\"></i>教程\n",
      "        </a>\n",
      "        <a class=\"item\" href=\"/index\">\n",
      "          <i class=\"book icon\"></i>项目\n",
      "        </a>\n",
      "\n",
      "\n",
      "\n",
      "    </div>\n",
      "    \n",
      "\n",
      "        <div class=\"pusher\">\n",
      "            <div class=\"ui grid\">\n",
      "                <div class=\"ui computer only sixteen wide computer column \">\n",
      "                    \n",
      "                    <div class=\"ui inverted teal borderless menu noradius\">\n",
      "                        <div class=\"ui container\">\n",
      "                            <a class=\"header item bigfont\" href=\"/\">\n",
      "                                SpBeen\n",
      "                            </a>\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\" >\n",
      "                                <div class=\"text\">教程</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/course/Python3教程\">Python3教程</a>\n",
      "                                    <a class=\"item\" href=\"/course/xpath教程/\" >Xpath教程</a>\n",
      "                                    <a class=\"item\" href=\"/course/Linux常用命令/\" >Linux常用命令</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\" >\n",
      "                                <div class=\"text\">项目</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/item/基于Flask的微信娱乐机器人\">基于Flask的微信娱乐机器人</a>\n",
      "                                    <a class=\"item\" href=\"/item/基于Python的网络爬虫\">基于Python的网络爬虫</a>\n",
      "                                    <a class=\"item\" href=\"/item/基于 Python 的文件备份\">基于 Python 的文件备份</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "\n",
      "\n",
      "                            <div class=\"ui simple dropdown item\" >\n",
      "                                <div class=\"text\">工具</div><i class=\"dropdown icon\"></i>\n",
      "                                <div class=\"menu\">\n",
      "                                    <a class=\"item\" href=\"/tool/request_info/\">IP请求信息查询</a>\n",
      "                                </div>\n",
      "                            </div>\n",
      "                        </div>\n",
      "                    </div>\n",
      "                    \n",
      "                </div>\n",
      "                <div class=\"ui mobile only tablet only sixteen wide mobile sixteen wide tablet column\">\n",
      "                    <div class=\"ui inverted teal borderless menu noradius mobile-tablet\">\n",
      "                        <a id=\"sidebar\"  class=\"item\"><i class=\"bigger sidebar icon\"></i></a>\n",
      "\n",
      "                        <a href=\"/\" class=\"right item bigfont mobile-tablet-item\">Spbeen</a>\n",
      "\n",
      "                        <!--</div>-->\n",
      "                    </div>\n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "        <div class=\"ui container\">\n",
      "            <div id=\"bread\" class=\"ui icon message\">\n",
      "\n",
      "                    <i id=\"mapicon\" class=\"location arrow icon\"></i>\n",
      "                    <div class=\"content\">\n",
      "                    <p>\n",
      "                        <div class=\"ui breadcrumb\">\n",
      "\n",
      "\n",
      "                            \n",
      "                                <a class=\"section\" href=\"/\">Spbeen</a>\n",
      "                                <div class=\"divider\"> > </div>\n",
      "                            \n",
      "\n",
      "                        </div>\n",
      "                    </p>\n",
      "                    </div>\n",
      "\n",
      "\n",
      "            </div>\n",
      "        </div>\n",
      "\n",
      "            <div class=\"content\">\n",
      "                \n",
      "\n",
      "<div class=\"ui container\">\n",
      "\n",
      "    <div class=\"ui yellow message\">\n",
      "        <ul class=\"ui list\">\n",
      "            <li>User Agent：请求的代理信息</li>\n",
      "            <li>IP：请求的IP地址</li>\n",
      "            <li>Request URL：请求的访问路由</li>\n",
      "            <li>Accept Languages：请求所支持的语言</li>\n",
      "            <li>Accept Encoding：请求所支持的编码格式</li>\n",
      "            <!--<li>Cookies：请求的Cookies</li>-->\n",
      "        </ul>\n",
      "    </div>\n",
      "\n",
      "    <div class=\"ui red segment\">\n",
      "        <div class=\"ui top left attached label\">User-Agent</div>\n",
      "        <div class=\"container\">\n",
      "            python-requests/2.22.0\n",
      "        </div>\n",
      "    </div>\n",
      "    <div class=\"ui blue segment\">\n",
      "        <div class=\"ui top left attached label\">IP Address</div>\n",
      "        <div class=\"container\">\n",
      "            114.95.120.147\n",
      "        </div>\n",
      "    </div>\n",
      "    <div class=\"ui green  segment\">\n",
      "        <div class=\"ui top left attached label\">Request URL</div>\n",
      "        <div class=\"container\">\n",
      "            www.spbeen.com/tool/request_info/\n",
      "        </div>\n",
      "    </div>\n",
      "    <div class=\"ui yellow  segment\">\n",
      "        <div class=\"ui top left attached label\">Accept Languages</div>\n",
      "        <div class=\"container\">\n",
      "            NOT GET\n",
      "        </div>\n",
      "    </div>\n",
      "    <div class=\"ui pink  segment\">\n",
      "        <div class=\"ui top left attached label\">Accept Encoding</div>\n",
      "        <div class=\"container\">\n",
      "            gzip, deflate\n",
      "        </div>\n",
      "    </div>\n",
      "</div>\n",
      "\n",
      "            </div>\n",
      "\n",
      "            \n",
      "            <div class=\"ui inverted teal segment noradius\">\n",
      "                <div class=\"ui container\">\n",
      "\n",
      "                   Copyright @2016-2017 | 赣ICP备16003025号 <script type=\"text/javascript\">var cnzz_protocol = ((\"https:\" == document.location.protocol) ? \" https://\" : \" http://\");document.write(unescape(\"%3Cspan id='cnzz_stat_icon_1260149661'%3E%3C/span%3E%3Cscript src='\" + cnzz_protocol + \"s11.cnzz.com/z_stat.php%3Fid%3D1260149661' type='text/javascript'%3E%3C/script%3E\"));</script>\n",
      "\n",
      "\n",
      "                </div>\n",
      "            </div>\n",
      "\n",
      "            \n",
      "            \n",
      "            <script>\n",
      "                $(document).ready(function(){\n",
      "                    $(\".content table\").addClass(\"ui celled striped table\");\n",
      "\n",
      "                    $(\"#sidebar\").click(function(){\n",
      "                      $('.ui.sidebar').sidebar('toggle');\n",
      "                    });\n",
      "\n",
      "                });\n",
      "            </script>\n",
      "            \n",
      "\n",
      "    </div>\n",
      "\t</body>\n",
      "\n",
      "</html>\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "url = 'http://spbeen.com/tool/request_info/'\n",
    "resp = requests.get(url)\n",
    "print(resp.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "soup1 = BeautifulSoup(resp.text, 'html.parser')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'bs4.BeautifulSoup'>\n"
     ]
    }
   ],
   "source": [
    "print(type(soup1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<title>Python技术栈 | Spbeen</title>\n"
     ]
    }
   ],
   "source": [
    "print(soup1.title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<a class=\"item bigfont\" href=\"/\">\n",
      "            Spbeen <i class=\"home icon\"></i>\n",
      "</a>\n"
     ]
    }
   ],
   "source": [
    "print(soup1.find('a'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<title>Python技术栈 | Spbeen</title>\n",
      "<a class=\"item bigfont\" href=\"/\">\n",
      "            Spbeen <i class=\"home icon\"></i>\n",
      "</a>\n"
     ]
    }
   ],
   "source": [
    "soup2 = BeautifulSoup(resp.text, 'lxml')\n",
    "print(soup2.title)\n",
    "print(soup2.find('a'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<title>Python技术栈 | Spbeen</title>\n",
      "<a class=\"item bigfont\" href=\"/\">\n",
      "            Spbeen <i class=\"home icon\"></i>\n",
      "</a>\n"
     ]
    }
   ],
   "source": [
    "soup3 = BeautifulSoup(resp.text, 'lxml')\n",
    "print(soup3.title)\n",
    "print(soup3.find('a'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
